15. Mapping Schools
https://data.cityofnewyork.us/Education/2019-2020-School-Locations/wg9x-4ke6
[6]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import geopandas as gpd
import folium
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import Markdown as md
from nycschools import schools, exams, ui
from IPython.display import display, HTML
# display(HTML("<style>.container { width:100% !important; }</style>"))
The autoreload extension is already loaded. To reload it, use:
%reload_ext autoreload
[7]:
# load and clean the school demographics and school location data
demo = schools.load_school_demographics()
dbns = demo.dbn.unique()
demo.columns
[7]:
Index(['dbn', 'beds', 'district', 'geo_district', 'boro', 'school_name',
'short_name', 'ay', 'year', 'total_enrollment',
'grade_3k_pk_half_day_full', 'grade_k', 'grade_1', 'grade_2', 'grade_3',
'grade_4', 'grade_5', 'grade_6', 'grade_7', 'grade_8', 'grade_9',
'grade_10', 'grade_11', 'grade_12', 'female_n', 'female_pct', 'male_n',
'male_pct', 'asian_n', 'asian_pct', 'black_n', 'black_pct',
'hispanic_n', 'hispanic_pct', 'multi_racial_n', 'multi_racial_pct',
'native_american_n', 'native_american_pct', 'white_n', 'white_pct',
'missing_race_ethnicity_data_n', 'missing_race_ethnicity_data_pct',
'swd_n', 'swd_pct', 'ell_n', 'ell_pct', 'poverty_n', 'poverty_pct',
'eni_pct', 'clean_name', 'zip'],
dtype='object')
[12]:
geojsonurl = "https://data.cityofnewyork.us/resource/a3nt-yts4.geojson?$limit=1000000"
df = gpd.read_file(geojsonurl)
df = df.rename(columns={"xcoordinat":"x","ycoordinat":"y",})
df.x = pd.to_numeric(df.x, errors='coerce')
df.y = pd.to_numeric(df.y, errors='coerce')
df = df[df.x > 0]
df["dbn"] = df.ats_code
df = df[["x","y","dbn","zip","loc_name"]]
url = "https://data.cityofnewyork.us/resource/wg9x-4ke6.csv?$limit=1000000"
loc2 = pd.read_csv(url)
loc2["dbn"] = loc2.system_code
cols = [
'dbn',
'administrative_district_code',
'administrative_district_name',
'beds',
'borough_block_lot',
'census_tract',
'community_district',
'community_district_1',
'community_school_sup_name',
'council_district',
'fax_number',
'fiscal_year',
'geographical_district_code',
'grades_final_text',
'grades_text',
'highschool_network',
'highschool_network_location',
'highschool_network_name',
'latitude',
'location_category_description',
'location_code',
'location_name',
'location_type_description',
'longitude',
'managed_by_name',
'nta',
'nta_name',
'open_date',
'police_precinct',
'primary_building_code',
'principal_name',
'principal_phone_number',
'principal_title',
'state_code',
'status_descriptions']
loc2 = loc2[cols]
df = df.merge(loc2, on="dbn", how="left")
df.open_date
[12]:
0 1999-07-01T00:00:00.000
1 1898-07-01 00:00:00.000
2 1904-07-01T00:00:00.000
3 1972-07-01T00:00:00.000
4 2011-07-01T00:00:00.000
...
2099 1900-07-01T00:00:00.000
2100 2000-07-01T00:00:00.000
2101 1961-07-01T00:00:00.000
2102 2011-07-01T00:00:00.000
2103 1927-07-01T00:00:00.000
Name: open_date, Length: 2104, dtype: object
[11]:
a = set(df.dbn)
b = set(loc2.dbn)
df = df.merge(loc2, on="dbn", how="right")
df.columns
[11]:
Index(['x', 'y', 'dbn', 'zip', 'loc_name', 'administrative_district_code_x',
'administrative_district_name_x', 'beds_x', 'borough_block_lot_x',
'census_tract_x', 'community_district_x', 'community_district_1_x',
'community_school_sup_name_x', 'council_district_x', 'fax_number_x',
'fiscal_year_x', 'geographical_district_code_x', 'grades_final_text_x',
'grades_text_x', 'highschool_network_x',
'highschool_network_location_x', 'highschool_network_name_x',
'latitude_x', 'location_category_description_x', 'location_code_x',
'location_name_x', 'location_type_description_x', 'longitude_x',
'managed_by_name_x', 'nta_x', 'nta_name_x', 'open_date_x',
'police_precinct_x', 'primary_building_code_x', 'principal_name_x',
'principal_phone_number_x', 'principal_title_x', 'state_code_x',
'status_descriptions_x', 'administrative_district_code_y',
'administrative_district_name_y', 'beds_y', 'borough_block_lot_y',
'census_tract_y', 'community_district_y', 'community_district_1_y',
'community_school_sup_name_y', 'council_district_y', 'fax_number_y',
'fiscal_year_y', 'geographical_district_code_y', 'grades_final_text_y',
'grades_text_y', 'highschool_network_y',
'highschool_network_location_y', 'highschool_network_name_y',
'latitude_y', 'location_category_description_y', 'location_code_y',
'location_name_y', 'location_type_description_y', 'longitude_y',
'managed_by_name_y', 'nta_y', 'nta_name_y', 'open_date_y',
'police_precinct_y', 'primary_building_code_y', 'principal_name_y',
'principal_phone_number_y', 'principal_title_y', 'state_code_y',
'status_descriptions_y'],
dtype='object')
[ ]:
df = df.merge(demo, on="dbn", how="inner")
demo[~demo.dbn.isin(df.dbn)]
| dbn | beds | district | boro | school_name | short_name | ay | year | total_enrollment | grade_3k_pk_half_day_full | ... | missing_race_ethnicity_data_n | missing_race_ethnicity_data_pct | swd_n | swd_pct | ell_n | ell_pct | poverty_n | poverty_pct | eni_pct | clean_name | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 9142 | 84X489 | 320900861084 | 84 | Bronx | South Bronx Classical Charter School III | PS 489 | 2016 | 2016-17 | 160 | 0 | ... | 0 | 0.000000 | 16 | 0.100000 | 30 | 0.188 | 142 | 0.887 | 0.783 | south bronx classical charter school iii |
| 9143 | 84X489 | 320900861084 | 84 | Bronx | South Bronx Classical Charter School III | PS 489 | 2017 | 2017-18 | 275 | 0 | ... | 1 | 0.003636 | 24 | 0.087273 | 56 | 0.204 | 242 | 0.880 | 0.884 | south bronx classical charter school iii |
| 9144 | 84X489 | 320900861084 | 84 | Bronx | South Bronx Classical Charter School III | PS 489 | 2018 | 2018-19 | 316 | 0 | ... | 1 | 0.003165 | 34 | 0.107595 | 66 | 0.209 | 281 | 0.889 | 0.877 | south bronx classical charter school iii |
| 9145 | 84X489 | 320900861084 | 84 | Bronx | South Bronx Classical Charter School III | PS 489 | 2019 | 2019-20 | 323 | 0 | ... | 0 | 0.000000 | 33 | 0.102167 | 59 | 0.183 | 291 | 0.901 | 0.875 | south bronx classical charter school iii |
| 9146 | 84X489 | 320900861084 | 84 | Bronx | South Bronx Classical Charter School III | PS 489 | 2020 | 2020-21 | 430 | 0 | ... | 1 | 0.002326 | 47 | 0.109302 | 91 | 0.212 | 401 | 0.933 | 0.865 | south bronx classical charter school iii |
| 9219 | 84X588 | 320900861122 | 84 | Bronx | South Bronx Classical Charter School IV | NA 588 | 2017 | 2017-18 | 129 | 0 | ... | 0 | 0.000000 | 14 | 0.108527 | 27 | 0.209 | 120 | 0.930 | 0.913 | south bronx classical charter school iv |
| 9220 | 84X588 | 320900861122 | 84 | Bronx | South Bronx Classical Charter School IV | PS 588 | 2018 | 2018-19 | 154 | 0 | ... | 0 | 0.000000 | 17 | 0.110390 | 44 | 0.286 | 144 | 0.935 | 0.930 | south bronx classical charter school iv |
| 9221 | 84X588 | 320900861122 | 84 | Bronx | South Bronx Classical Charter School IV | PS 588 | 2019 | 2019-20 | 164 | 0 | ... | 0 | 0.000000 | 25 | 0.152439 | 47 | 0.287 | 158 | 0.960 | 0.920 | south bronx classical charter school iv |
| 9222 | 84X588 | 320900861122 | 84 | Bronx | South Bronx Classical Charter School IV | PS 588 | 2020 | 2020-21 | 208 | 0 | ... | 3 | 0.014423 | 26 | 0.125000 | 54 | 0.260 | 169 | 0.813 | 0.852 | south bronx classical charter school iv |
9 rows × 49 columns
[5]:
# drop schools not in the demographic data set
# df = df[df["system_code"].isin(dbns)]
districts = gpd.read_file("https://data.cityofnewyork.us/api/geospatial/r8nu-ymqj?method=export&format=GeoJSON")
# rename the columns
districts.columns = ['district', 'area', 'length', 'geometry']
districts.district = pd.to_numeric(districts.district, downcast='integer', errors='coerce')
fig, ax = plt.subplots(figsize=(16, 16))
districts.plot(ax=ax)
df.plot(ax=ax,color="red")
[5]:
<AxesSubplot:>
[ ]:
# get some better column names
df = df.rename(columns={"system_code":"dbn"})
df["x"] = df.longitude
df["y"] = df.latitude
# drop rows that might be missing geolocation data or have bad data
df = df[df["x"].notnull() & df["y"].notnull()]
[ ]:
geo = gpd.points_from_xy(x=df.x,y=df.y)
gdf = gpd.GeoDataFrame(df, geometry=geo, crs="EPSG:4326")
gdf[["dbn", "location_name", "geometry"]]
df.columns
gdf.explore()
Make this Notebook Trusted to load map: File -> Trust Notebook
[ ]:
# join the demographic and geographic data sets
school_geo = gdf.set_index("dbn").join(demo.set_index("dbn"))
school_geo = school_geo.reset_index()
# read the GeoJSON file directly from the download link
districts = gpd.read_file("https://data.cityofnewyork.us/api/geospatial/r8nu-ymqj?method=export&format=GeoJSON")
# rename the columns
districts.columns = ['district', 'area', 'length', 'geometry']
districts.district = pd.to_numeric(districts.district, downcast='integer', errors='coerce')
school_geo.columns
Index(['dbn', 'fiscal_year', 'location_code', 'location_name', 'beds',
'managed_by_name', 'location_type_description',
'location_category_description', 'grades_text', 'grades_final_text',
'open_date', 'status_descriptions', 'primary_building_code',
'primary_address_line_1', 'state_code', 'x_coordinate', 'y_coordinate',
'longitude', 'latitude', 'community_district', 'council_district',
'census_tract', 'borough_block_lot', 'nta', 'nta_name',
'principal_name', 'principal_title', 'principal_phone_number',
'fax_number', 'geographical_district_code',
'administrative_district_code', 'administrative_district_name',
'community_school_sup_name', 'tier_3_support_location_name',
'tier_3_support_leader_name', 'tier_2_support_location_name',
'highschool_network_location', 'highschool_network_name',
'highschool_network', 'community_district_1', 'police_precinct', 'x',
'y', 'geometry', 'district', 'boro', 'school_name', 'short_name', 'ay',
'year', 'total_enrollment', 'grade_3k_pk_half_day_full', 'grade_k',
'grade_1', 'grade_2', 'grade_3', 'grade_4', 'grade_5', 'grade_6',
'grade_7', 'grade_8', 'grade_9', 'grade_10', 'grade_11', 'grade_12',
'female_n', 'female_pct', 'male_n', 'male_pct', 'asian_n', 'asian_pct',
'black_n', 'black_pct', 'hispanic_n', 'hispanic_pct', 'multi_racial_n',
'multi_racial_pct', 'native_american_n', 'native_american_pct',
'white_n', 'white_pct', 'missing_race_ethnicity_data_n',
'missing_race_ethnicity_data_pct', 'swd_n', 'swd_pct', 'ell_n',
'ell_pct', 'poverty_n', 'poverty_pct', 'eni_pct', 'clean_name'],
dtype='object')
[ ]:
# plot the schools and the districts
fig, ax = plt.subplots(figsize=(16, 16))
districts.plot(ax=ax)
plt.scatter(x=school_geo.x, y= school_geo.y, color="red")
<matplotlib.collections.PathCollection at 0x7f3ae0c8c1f0>
[ ]:
# convert district into specific geospation format called epsg 4326
districts = districts.to_crs(epsg=4326)
district_map = districts.explore(
column="district", # use district for the categories (aka chloropath)
popup=False,
tooltip="district",
tiles="CartoDB positron", # use "CartoDB positron" tiles
cmap="tab20b", # use "tab20b" matplotlib colormap
style_kwds=dict(color="black") # use black outline
)
district_map
Make this Notebook Trusted to load map: File -> Trust Notebook
[ ]:
def school_pop(row):
html = f"""
<div style="min-width: 200px">
dbn: {row.dbn}<br>
district: {row.district}<br>
name: {row.school_name}<br>
size: {row.total_enrollment}<br>
pct poverty: {row.poverty_pct:.1%}<br>
pct Asian: {row.asian_pct:.1%}<br>
pct Black: {row.black_pct:.1%}<br>
pct Hispanic: {row.hispanic_pct:.1%}<br>
pct White: {row.white_pct:.1%}
</div>"""
return html
def dist_map(x):
cmap = ui.hexmap(plt.get_cmap("tab10"))
if x < 33:
return cmap(0)
if x == 84:
return cmap(1)
if x == 75:
return cmap(2)
return cmap(3)
school_geo["district_color"] = school_geo.district.apply(dist_map)
school_geo["school info"] = school_geo.apply(school_pop, axis=1)
school_geo = school_geo.to_crs(epsg=4326)
school_geo.explore(m=district_map, tooltip=False, popup="school info", color="district_color")
Make this Notebook Trusted to load map: File -> Trust Notebook